clear all

// This file uses candidate-level data to create a district-year level dataset for the upper house to merge with election data in 2_merge_election_data.do

// Inputs are Candidate-level state legislator election results (1967-2016) from Klarner Harvard Dataverse
	// Accessed here: https://dataverse.harvard.edu/file.xhtml?persistentId=doi:10.7910/DVN/3WZFK9/JOH4DS&version=3.0
// and districts per state, listed in ~/input/districtsperstate.xlsx :
	// Accessed here:  http://www.ncsl.org/research/about-state-legislatures/number-of-legislators-and-length-of-terms.aspx

// Candidate-level data
import delim "$InputPath/196slers1967to2016_20180908.tab", clear

replace sab=upper(sab)

// keep upper house (not Nebraska)
keep if sen==1 & sab!="NE" 

// identify most recent election year 

gen recent_07 = (year<=2004 & year>=2000)
gen recent_12 = (year<=2010 & year>=2006) 
gen recent_17 = (year<=2014 & year>=2011)

foreach i in 07 12 17 {
	egen most_recent_`i'=max(year) if recent_`i'==1, by(sab ddez)
}

keep if recent_07==1 | recent_12==1 | recent_17==1
//keep if most_recent_07==year | most_recent_12==year | most_recent_17==year

// trim years that are not within time period, but tagged as most recent due to uncontested elections
drop if year<2002 & sab!="TX"

rename year eyear
gen 	year=2017 	if recent_07==1	// in office for fiscal year 2016-2017
replace year=2012 	if recent_12==1					// in office for fiscal year 2011-2012
replace year=2007	if recent_17==1	// in office for fiscal year 2006-2007


drop most_recent* recent*

// keep general elections only + final round in LA
gen secondround=(etype=="sfunset" | etype=="srunoff")
egen exists_secondround=total(secondround), by(sab ddez year )

keep if etype=="g" | (sab=="LA" & (exists_secondround==0 & etype!="sfunset" & etype!="srunoff" ) ) | (sab=="LA" & (exists_secondround>0 & (etype=="sfunset" | etype=="srunoff" )) )  // general elections only  + final round in LA
drop if partyz=="writein" // candidates with only a few votes were coded with name "SCATTERING" in original files, or as 'write-in' in updated files 


rename (sfips month ddez dtype eseats term exper out ) (fstate electmonth sldu multimember nmember termlength incumbent winner )

label define memberlabel 1 "Single member" 2 "Multimember with posts" 3 "Multimember free for all"
label values multimember memberlabel


replace nmember=1 if fstate==22 // Louisiana `eseats' variable is coded differently due to runoffs, but we want the number of elected members; see codebook.


gen ndemcand=(partyz=="d")
gen nrepcand=(partyz=="r")
gen nothercand=(partyz!="d" & partyz!="r")

destring sldu, replace

gen incumbentran=incumbent == "inc"
gen demwon=(winner=="w")*ndemcand
gen repwon=(winner=="w")*nrepcand
gen indwon=(winner=="w")*nothercand


statastates, a(sab) 
drop if _m!=3 
drop _m

rename (sab state_name ) (state_abbrev state)

gen slduname=sldu
do "$CodePath/elections_csld_edits/slducodes.do" //replace string districts with code

//drop if real(sldu)==. // some district names are not identified in Census data
destring sldu, replace

tempfile base
save `base', replace

// Merge with number of districts
clear
import delim "$InputPath/districtsperstate.csv", clear
rename ?state state
statastates, name(state)
keep if _merge==3 //(drop total and DC)
drop _merge state_abbrev state
rename state_fips fstate


merge 1:m fstate using `base' //all merge except Nebraska (one house legislature)
drop _m


//collapse to candidate-year-level observation--some candidates are listed twice if they are nominated by multiple parties (e.g.,  NY) or in multiple counties (e.g., WY)
egen tag=tag(year fstate sldu candid) 
keep if tag==1 

//collapse (mean) totaldistricts* *term  ndemcand nrepcand nothercand electmonth multimember nmember floterial_nmember termlength (sum) demwon repwon indwon incumbentran, by(year fstate sldl candid)

//collapse to district-year-level 
collapse (mean) eyear totaldistricts* *term electmonth multimember nmember termlength (sum) ndemcand nrepcand nothercand (sum) demwon repwon indwon incumbentran, by(year fstate sldu)


cd "$OutputPath/"

save statelegislators_upper, replace





